library(sf)
library(tidyverse)
library(ggplot2)
library(readxl)
library(plotly)
library(dplyr)
library(foreign)
library(ggiraph)
library(patchwork)
library(gender)
library (osmdata)
library(httr)
library(xmlr)
library(xml2)
library(jsonlite)
library(tidytext)
library(tm)
library(SnowballC)
library(wordcloud)
# convert time at residence/in Savannah to double
demographic_data <- read_csv("Data1_DemographicData.csv", show_col_types = FALSE) %>%
mutate (ID = `ID Number`,
residence_time = as.double(str_extract(`How long have you lived at your residence?`, "[^ years]*")),
savannah_time = as.double(str_extract(`How long have you lived in Savannah?`, "[^ years]*") )) %>%
select(-`How long have you lived at your residence?`, -`How long have you lived in Savannah?`)
demographic_data_for_table <- select(demographic_data, -Timestamp, -ID, -`ID Number`, -City, -State, - `Cross Street`, -`Accuracy`, -`Longitude`, -`Latitude`, -`How many people live in your home with you?`)
print_stats <- function(variable) {
if (is.character(demographic_data[[variable]]))
{
factor_version <- factor(demographic_data[[variable]])
print(paste(variable, " MEAN"))
print(mean(as.numeric(factor_version) - 1))
counts <- demographic_data %>% group_by_at(variable) %>%
summarize(count=n(), .groups="drop") %>%
mutate(percent = 100*(count/nrow(demographic_data)))
print(counts)
} else {
print(paste(variable," STATS"))
print(paste(mean(demographic_data[[variable]]) , " & ", min(demographic_data[[variable]]) , "-" , max(demographic_data[[variable]]) , " (st. dev. = " , sd(demographic_data[[variable]]) , ")", sep=""))
}
}
nrow(demographic_data)
## [1] 75
for (variable in names(demographic_data_for_table))
{
print_stats(variable)
}
## [1] "Do you own or have access to a car? MEAN"
## [1] 0.84
## # A tibble: 2 × 3
## `Do you own or have access to a car?` count percent
## <chr> <int> <dbl>
## 1 No 12 16
## 2 Yes 63 84
## [1] "Do you drive? MEAN"
## [1] 0.92
## # A tibble: 2 × 3
## `Do you drive?` count percent
## <chr> <int> <dbl>
## 1 No 6 8
## 2 Yes 69 92
## [1] "HHSize STATS"
## [1] "2.41333333333333 & 1-6 (st. dev. = 1.39587552040591)"
## [1] "What is your age range? MEAN"
## [1] 1.333333
## # A tibble: 4 × 3
## `What is your age range?` count percent
....
social_network_data <- read_excel("Data2_SocialNetworkData_CA.xlsx")
open_ended_data <- read_excel("Data3_OpenResponse.xlsx")
# Group the "Friend" and "Frienship" category together
gis_data <- read.dbf("AllPoints_V11.dbf", as.is=TRUE) %>%
mutate(RELATIONSH = ifelse(RELATIONSH=="Friend", "Friendship", RELATIONSH))
gis_data
## SOCIALNETW CHECK SOCIALNE_1
## 1 1025 Correct 10071
## 2 1319 Correct 10095
## 3 1320 Correct 10095
## 4 1321 Correct 10095
## 5 1322 Correct 10095
## 6 459 Correct 10049
## 7 460 Correct 10049
## 8 87 Correct 10048
## 9 88 Correct 10048
## 10 89 Correct 10048
## 11 486 Different from Google - needs to be re-geocoded 10064
## 12 487 Correct 10064
## 13 488 Correct 10064
## 14 926 Correct 10072
## 15 927 Clio Updated 10072
## 16 928 Correct 10072
## 17 995 Correct 10071
## 18 996 Correct 10071
## 19 997 <NA> 10071
....
all_data <- left_join(left_join(demographic_data, social_network_data, by="ID Number"), open_ended_data, by="ID Number")
all_data
## # A tibble: 75 × 32
## Timestamp `ID Number` City State `Cross Street` Accuracy Latitude Longitude
## <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 8/22/2023… 10025 Sava… Geor… Montgomery cr… interse… 32.0 -81.1
## 2 8/22/2023… 10028 Sava… Geor… 512 Abercorn … Rooftop 32.1 -81.1
## 3 9/26/2023… 10029 Sava… Geor… Reynolds & wa… Cross S… 32.0 -81.1
## 4 7/12/2023… 10038 Sava… Geor… 917 Bubbedge … Rooftop 32.1 -81.1
## 5 8/22/2023… 10039 Sava… Geor… 7345 Grant St Rooftop 32.0 -81.1
## 6 9/19/2023… 10040 Sava… Geor… 208 Ford aven… Street … 32.0 -81.3
## 7 8/16/2023… 10041 Sava… Geor… Johnny Mercer… Street … 32.0 -81.0
## 8 9/19/2023… 10042 Sava… Geor… Park & Harmon Street … 32.1 -81.1
## 9 7/18/2023… 10043 Sava… Geor… Skylark Road,… Street … 32.1 -81.3
## 10 7/12/2023… 10044 Sava… Geor… 313 West Hall… interse… 32.1 -81.1
## # ℹ 65 more rows
## # ℹ 24 more variables: `Do you own or have access to a car?` <chr>,
## # `Do you drive?` <chr>, `How many people live in your home with you?` <chr>,
## # HHSize <dbl>, `What is your age range?` <chr>,
## # `What gender do you identify with?` <chr>,
## # `What race/ethnicity do you identify with?` <chr>,
## # `Disabled Community?` <chr>, `LGBTQ+ Community?` <chr>, ID <dbl>, …
....
poi_counts_by_relationships <- gis_data %>%
group_by(RELATIONSH) %>%
summarize (poi_count = n()) %>%
select(RELATIONSH, poi_count)
poi_counts_by_relationships
## # A tibble: 5 × 2
## RELATIONSH poi_count
## <chr> <int>
## 1 Family 573
## 2 Friendship 398
## 3 Other 50
## 4 Professional 45
## 5 Romantic 118
sav_map <- read_sf("ZIP_Codes.geojson")
map_gender_relationships <- function(participant_gender, relationship_gender) {
if (participant_gender == "Female" && relationship_gender == "F")
{
"F-F"
} else if (participant_gender == "Male" && relationship_gender == "M") {
"M-M"
} else if (participant_gender == "Female" && relationship_gender == "M")
{
"F-M"
} else if (participant_gender == "Male" && relationship_gender == "F")
{
"M-F"
} else if (relationship_gender == "M")
{
"N-M"
} else if (relationship_gender == "F") {
"N-F"
} else if (participant_gender == "Female") {
"F-U"
} else if (participant_gender == "Male") {
"M-U"
} else {
"N-U"
}
}
map_gender_guess <- function(relationship_gender, person) {
name <- unlist(strsplit(person, split=' '))[1]
name <- unlist(strsplit(name, split=')'))[1]
guess <- gender(name, method="ssa")
if (nrow(guess) > 0 && relationship_gender == "Undefined from relationship")
{
if (guess$proportion_female > 0.99)
{
"F"
} else if (guess$proportion_male > 0.99) {
"M"
} else {
relationship_gender
}
}
else {
relationship_gender
}
}
gis_demographic_data <- gis_data %>%
left_join(select(demographic_data, ID, `What gender do you identify with?`),
by = c("SOCIALNE_1" = "ID"))
gis_demographic_data
## SOCIALNETW CHECK SOCIALNE_1
## 1 1025 Correct 10071
## 2 1319 Correct 10095
## 3 1320 Correct 10095
## 4 1321 Correct 10095
## 5 1322 Correct 10095
## 6 459 Correct 10049
## 7 460 Correct 10049
## 8 87 Correct 10048
## 9 88 Correct 10048
## 10 89 Correct 10048
## 11 486 Different from Google - needs to be re-geocoded 10064
## 12 487 Correct 10064
## 13 488 Correct 10064
## 14 926 Correct 10072
## 15 927 Clio Updated 10072
## 16 928 Correct 10072
## 17 995 Correct 10071
## 18 996 Correct 10071
## 19 997 <NA> 10071
....
gis_gender_data <- gis_demographic_data %>%
mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, GENDER), map_gender_relationships)))
gis_gender_data_guess <- gis_demographic_data %>%
mutate(gender_guess = unlist(pmap(list(GENDER, PERSON_WIT), map_gender_guess))) %>%
mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, gender_guess), map_gender_relationships)))
gis_gender_data_guess
## SOCIALNETW CHECK SOCIALNE_1
## 1 1025 Correct 10071
## 2 1319 Correct 10095
## 3 1320 Correct 10095
## 4 1321 Correct 10095
## 5 1322 Correct 10095
## 6 459 Correct 10049
## 7 460 Correct 10049
## 8 87 Correct 10048
## 9 88 Correct 10048
## 10 89 Correct 10048
## 11 486 Different from Google - needs to be re-geocoded 10064
## 12 487 Correct 10064
## 13 488 Correct 10064
## 14 926 Correct 10072
## 15 927 Clio Updated 10072
## 16 928 Correct 10072
## 17 995 Correct 10071
## 18 996 Correct 10071
## 19 997 <NA> 10071
....
library(RColorBrewer)
gis_gender_data_guess_filtered <- gis_gender_data_guess %>% filter(LAT > 31.0 & LAT < 32.7)
gis_gender_data_guess %>%
ggplot(aes(x=RELATIONSH, fill=relationship_gender)) +
scale_fill_brewer(palette = "Dark2") +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90, vjust = 0))
p1 <- gis_gender_data_guess_filtered %>%
filter(relationship_gender=="F-F" | relationship_gender=="F-M" | relationship_gender=="M-F" | relationship_gender=="M-M") %>%
ggplot(aes(x=RELATIONSH, fill=relationship_gender, data_id=relationship_gender)) +
scale_fill_brewer(palette = "Dark2") +
geom_bar_interactive(position = "dodge") + theme(axis.text.x = element_text(angle = 90, vjust = 0))
p2 <- sav_map %>%
ggplot() +
geom_sf() +
geom_point_interactive(data=gis_gender_data_guess_filtered %>% filter(relationship_gender == "F-F" | relationship_gender == "F-M" |relationship_gender == "M-F" | relationship_gender == "M-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, data_id=relationship_gender, tooltip = UNIQUE)) +
scale_color_brewer(palette = "Dark2") + theme(axis.text.x = element_text(angle = 90, vjust = 0))
hover_css <- "
filter: brightness(75%);
cursor: pointer;
transition: all 0.5s ease-out;
filter: brightness(1.15);
"
combined_plot <- p1 / p2 + plot_layout(nrow = 2)
interactive_plot <- girafe(ggobj = combined_plot) %>%
girafe_options(
opts_hover(css = hover_css),
opts_hover_inv(css = "opacity:0.1; transition: all 0.2s ease-out;"),
opts_sizing(rescale = TRUE)
)
interactive_plot
htmltools::save_html(interactive_plot, "interactive_map.html")
total_participants <- nrow(demographic_data)-1
f_f <- sum(gis_gender_data_guess$relationship_gender == "F-F")
m_m <- sum(gis_gender_data_guess$relationship_gender == "M-M")
m_f <- sum(gis_gender_data_guess$relationship_gender == "M-F")
f_m <- sum(gis_gender_data_guess$relationship_gender == "F-M")
reported_relationships <- as.table(rbind(c(m_m, m_f), c(f_m, f_f)))
dimnames(reported_relationships) <- list(gender_relationship = c("M", "F"),
gender_participant = c("M", "F"))
Xsq <- chisq.test(reported_relationships)
Xsq$observed # observed counts (same as M)
## gender_participant
## gender_relationship M F
## M 82 71
## F 307 567
Xsq$expected # expected counts under the null
## gender_participant
## gender_relationship M F
## M 57.95229 95.04771
## F 331.04771 542.95229
Xsq$residuals # Pearson residuals
## gender_participant
## gender_relationship M F
## M 3.158919 -2.466624
## F -1.321687 1.032031
Xsq$stdres # standardized residuals
## gender_participant
## gender_relationship M F
## M 4.344531 -4.344531
## F -4.344531 4.344531
Xsq$p.value
## [1] 2.097986e-05
mosaicplot(reported_relationships, las = 1, shade=TRUE)
spend_time <- read_csv("Spend_Time_CLOSED_Coding.csv", show_col_types = FALSE)
more_places <- read_csv("More_Places_CLOSED_Coding.csv", show_col_types = FALSE)
spend_time_merged <- spend_time %>%
left_join(select(demographic_data, ID, `What gender do you identify with?`), by = "ID")
more_places_merged <- more_places %>%
left_join(select(demographic_data, ID, `What gender do you identify with?`), by = "ID")
filter(spend_time_merged, is.na(Code))
## # A tibble: 0 × 6
## # ℹ 6 variables: ID <dbl>, Response <chr>, Person <chr>, Code <chr>,
## # Notes <chr>, What gender do you identify with? <chr>
spend_time_guess <- spend_time_merged %>%
mutate(gender_guess = unlist(pmap(list(Code, Person), map_gender_guess))) %>%
mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, gender_guess), map_gender_relationships)))
spend_time_guess %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender")
spend_time_guess %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender (Guess)")
spend_time_guess
## # A tibble: 81 × 8
## ID Response Person Code Notes What gender do you i…¹ gender_guess
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 10025 Her cousins (Cous… U <NA> Female U
## 2 10028 Haley. She has … Haley F <NA> Female F
## 3 10029 My old coworker… (Form… U <NA> Female U
## 4 10038 Her Fiance Char… Charl… M <NA> Female M
## 5 10040 Grandkids- they… (Gran… U <NA> Female U
## 6 10041 Family (Mother … (Moth… F <NA> Female F
## 7 10041 Family (Mother … (Fath… M <NA> Female M
## 8 10042 Grandparents- T… (Gran… U <NA> Female U
## 9 10043 Her sister, Nic… Nicol… F <NA> Female F
## 10 10044 Would like to r… Miche… Unde… <NA> Female F
## # ℹ 71 more rows
## # ℹ abbreviated name: ¹​`What gender do you identify with?`
## # ℹ 1 more variable: relationship_gender <chr>
total_participants <- nrow(demographic_data)-1
f_f <- sum(spend_time_guess$relationship_gender == "F-F")
m_m <- sum(spend_time_guess$relationship_gender == "M-M")
m_f <- sum(spend_time_guess$relationship_gender == "M-F")
f_m <- sum(spend_time_guess$relationship_gender == "F-M")
spend_time_table <- as.table(rbind(c(m_m, m_f), c(f_m, f_f)))
dimnames(spend_time_table) <- list(gender_relationship = c("M", "F"),
gender_participant = c("M", "F"))
Xsq <- chisq.test(spend_time_table)
Xsq$observed # observed counts (same as M)
## gender_participant
## gender_relationship M F
## M 1 5
## F 14 31
Xsq$expected # expected counts under the null
## gender_participant
## gender_relationship M F
## M 1.764706 4.235294
## F 13.235294 31.764706
Xsq$residuals # Pearson residuals
## gender_participant
## gender_relationship M F
## M -0.5756497 0.3715803
## F 0.2101975 -0.1356819
Xsq$stdres # standardized residuals
## gender_participant
## gender_relationship M F
## M -0.7294087 0.7294087
## F 0.7294087 -0.7294087
Xsq$p.value
## [1] 0.8006642
mosaicplot(spend_time_table, las = 1, shade=TRUE)
filter(more_places_merged, is.na(Gender))
## # A tibble: 0 × 5
## # ℹ 5 variables: ID <dbl>, Response <chr>, Person <chr>, Gender <chr>,
## # What gender do you identify with? <chr>
more_places_guess <- more_places_merged %>%
mutate(gender_guess = unlist(pmap(list(Gender, Person), map_gender_guess))) %>%
mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, gender_guess), map_gender_relationships)))
more_places_guess %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender")
more_places_guess %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender (Guess)")
f_f <- sum(more_places_guess$relationship_gender == "F-F")
m_m <- sum(more_places_guess$relationship_gender == "M-M")
m_f <- sum(more_places_guess$relationship_gender == "M-F")
f_m <- sum(more_places_guess$relationship_gender == "F-M")
more_places_table <- as.table(rbind(c(m_m, m_f), c(f_m, f_f)))
dimnames(more_places_table) <- list(gender_relationship = c("M", "F"),
gender_participant = c("M", "F"))
Xsq <- chisq.test(more_places_table)
Xsq$observed # observed counts (same as M)
## gender_participant
## gender_relationship M F
## M 2 5
## F 16 35
Xsq$expected # expected counts under the null
## gender_participant
## gender_relationship M F
## M 2.172414 4.827586
## F 15.827586 35.172414
Xsq$residuals # Pearson residuals
## gender_participant
## gender_relationship M F
## M -0.11697707 0.07847060
## F 0.04333758 -0.02907173
Xsq$stdres # standardized residuals
## gender_participant
## gender_relationship M F
## M -0.1502151 0.1502151
## F 0.1502151 -0.1502151
Xsq$p.value
## [1] 1
mosaicplot(more_places_table, las = 1, shade=TRUE)
lookup_osm <- function (address, lat, lon) {
url <- paste("http://nominatim.openstreetmap.org/search?format=json&q=", gsub(" ", "+", address), "&format=json", sep="")
place_type <- tryCatch({
res <- GET(url)
data <- content(res,"text")
if (!is.null(data)) {
place_data <- jsonlite::fromJSON(data)
toString(place_data$type)
}
}, error = function(e) {
"ERROR"
})
if (is.null(place_type) || place_type == "ERROR" )
{
url <- paste("http://nominatim.openstreetmap.org/reverse?format=json&lat=", lat, "&lon=", lon, "&zoom=18&addressdetails=1", sep="")
place_type <- tryCatch({
res <- GET(url)
data <- content(res,"text")
if (!is.null(data)) {
place_data <- jsonlite::fromJSON(data)
place_data$type[1]
}
}, ERROR = function(e) {
"ERROR"
})
}
place_type
}
This is an example of how to run the above function, I chunked the data because otherwise it would time out occasionally. The rest of the notebook imports the osm_data.csv file, which has been computed using this code.
# replace the above line with ```{r} if you want to run this chunk
osm_data <- head(gis_gender_data_guess,1) %>%
mutate(place_type = unlist(pmap(list(ADDRESS_1, LAT, LON), lookup_osm)))
osm_data
chunks <- ggplot2::cut_interval(1:nrow(gis_gender_data_guess), length=20, labels=FALSE)
unique(chunks)
for (i in unique(chunks))
{
new_chunk <- gis_gender_data_guess[which(chunks==i),] %>%
mutate(place_type = unlist(pmap(list(ADDRESS_1, LAT, LON), lookup_osm)))
osm_data <- rbind(osm_data, new_chunk)
}
osm_data
osm_data <- read.csv("osm_data.csv")
osm_data
## X.1 X SOCIALNETW CHECK
## 1 1 1 1025 Correct
## 2 2 2 1319 Correct
## 3 3 3 1320 Correct
## 4 4 4 1321 Correct
## 5 5 5 1322 Correct
## 6 6 6 459 Correct
## 7 7 7 460 Correct
## 8 8 8 87 Correct
## 9 9 9 88 Correct
## 10 10 10 89 Correct
## 11 11 11 486 Different from Google - needs to be re-geocoded
## 12 12 12 487 Correct
## 13 13 13 488 Correct
## 14 14 14 926 Correct
## 15 15 15 927 Clio Updated
## 16 16 16 928 Correct
## 17 17 17 995 Correct
## 18 18 18 996 Correct
## 19 19 19 997 <NA>
....
library(RColorBrewer)
filtered_osm <- osm_data %>%
filter(place_type != "yes" & place_type != "unclassified" & place_type != "primary" & place_type != "secondary" & place_type != "tertiary" & place_type != "parking") %>%
mutate(place_type = ifelse(place_type=="residential" | place_type=="house", "residential or house", place_type),
RELATIONSH = ifelse(RELATIONSH=="Friend", "Friendship", RELATIONSH)) %>%
filter(place_type != "residential or house")
top_7_place_types <- filtered_osm %>%
count(place_type, sort = TRUE) %>%
slice(1:7) %>% left_join(filtered_osm)
p1 <- top_7_place_types %>%
ggplot(aes(x=RELATIONSH, fill=place_type, data_id=place_type)) +
scale_fill_brewer(palette = "Dark2") +
geom_bar_interactive(position = "dodge") +
theme(axis.text.x = element_text(angle = 90, vjust = 0))
p2 <- sav_map %>%
ggplot() +
geom_sf() +
geom_point_interactive(data=top_7_place_types, alpha=0.5, aes(x=LON, y=LAT, color=place_type, data_id=place_type, tooltip = UNIQUE)) +
scale_color_brewer(palette = "Dark2") +
theme(axis.text.x = element_text(angle = 90, vjust = 0))
hover_css <- "
filter: brightness(75%);
cursor: pointer;
transition: all 0.5s ease-out;
filter: brightness(1.15);
"
combined_plot <- p1 / p2 + plot_layout(nrow = 2)
interactive_plot <- girafe(ggobj = combined_plot) %>%
girafe_options(
opts_hover(css = hover_css),
opts_hover_inv(css = "opacity:0.1; transition: all 0.2s ease-out;"),
opts_sizing(rescale = TRUE)
)
interactive_plot
htmltools::save_html(interactive_plot, "interactive_map2.html")
gis_gender_data %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender")
gis_gender_data_guess %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender (Guess)")
gis_gender_data_guess %>%
ggplot(aes(x=relationship_gender)) +
geom_bar(fill="darkblue") +
labs(title = "Relationships by Gender (Guess)")
gis_gender_data_guess_filtered <- gis_gender_data_guess %>% filter(LAT > 31.7 & LAT < 32.3 & LON> -81.4 & LON < -80.9)
gis_gender_data_guess_filtered
## SOCIALNETW CHECK SOCIALNE_1
## 1 1025 Correct 10071
## 2 1319 Correct 10095
## 3 1320 Correct 10095
## 4 1321 Correct 10095
## 5 1322 Correct 10095
## 6 459 Correct 10049
## 7 460 Correct 10049
## 8 87 Correct 10048
## 9 88 Correct 10048
## 10 89 Correct 10048
## 11 486 Different from Google - needs to be re-geocoded 10064
## 12 487 Correct 10064
## 13 488 Correct 10064
## 14 926 Correct 10072
## 15 927 Clio Updated 10072
## 16 928 Correct 10072
## 17 995 Correct 10071
## 18 996 Correct 10071
## 19 997 <NA> 10071
....
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_gender_data_guess_filtered %>% filter(relationship_gender == "F-F" | relationship_gender == "F-M" |relationship_gender == "M-F" | relationship_gender == "M-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE)) +
facet_grid(. ~ relationship_gender)
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "F-F"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "F-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "M-F"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "M-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))
# Difficult to assume younger-older or older-younger for most relationships in the data
map_age_relationships <- function(relationship) {
if (!is.na(relationship))
{
if (relationship == "Aunt" | relationship == "Father" | relationship == "Grandparent" | relationship == "Mother" | relationship == "Uncle" | relationship == "In Law")
{
"Y - O"
} else if (relationship == "Daughter" | relationship == "Son" | relationship == "Niece" | relationship == "Nephew" | relationship == "Grandchild")
{
"O - Y"
} else if (relationship == "Family" | relationship == "Cousin" | relationship == "Husband" | relationship == "Wife" | relationship == "Brother" | relationship == "Sister" | relationship == "Boyfriend" |
relationship == "Boss" | relationship == "Brother" | relationship == "Brother in Law" | relationship == "Church Relationship" | relationship == "Cousin" | relationship == "Coworker" | relationship == "Ex Partner" | relationship == "Friend" | relationship == "Group" | relationship == "Partner" | relationship == "Pastor" ){
"S - S"
}
} else {
"UNDEFINED"
}
}
gis_demographic_data
## SOCIALNETW CHECK SOCIALNE_1
## 1 1025 Correct 10071
## 2 1319 Correct 10095
## 3 1320 Correct 10095
## 4 1321 Correct 10095
## 5 1322 Correct 10095
## 6 459 Correct 10049
## 7 460 Correct 10049
## 8 87 Correct 10048
## 9 88 Correct 10048
## 10 89 Correct 10048
## 11 486 Different from Google - needs to be re-geocoded 10064
## 12 487 Correct 10064
## 13 488 Correct 10064
## 14 926 Correct 10072
## 15 927 Clio Updated 10072
## 16 928 Correct 10072
## 17 995 Correct 10071
## 18 996 Correct 10071
## 19 997 <NA> 10071
....
unique( gis_demographic_data$SOCIALNE_3)
## [1] "Aunt" "Boss" "Boyfriend"
## [4] "Brother" "Brother in Law" "Church Relationship"
## [7] "Cousin" "Coworker" "Daughter"
## [10] "Ex Partner" "Family" "Father"
## [13] "Friend" "Grandchild" "Grandparent"
## [16] "Group" "Husband" "In Law"
## [19] "Mother" "Nephew" "Niece"
## [22] "Partner" "Pastor" "Sister"
## [25] "Son" "Uncle" "Wife"
## [28] NA
gis_age_data_guess <- gis_demographic_data %>%
mutate(age_guess = unlist(pmap(list(SOCIALNE_3), map_age_relationships)))
# one small discovery: no "girlfriend" relationship in the dataset
filter(gis_age_data_guess, gis_age_data_guess$SOCIALNE_3 == "Girlfriend")
## [1] SOCIALNETW CHECK
## [3] SOCIALNE_1 SOCIALNE_2
## [5] PERSON_WIT SOCIALNE_3
## [7] NUMBER_OF_ RELATIONSH
## [9] GENDER RELATION_1
## [11] LONG_LIST SOCIALNE_4
## [13] SOCIALNE_5 SOCIALNE_6
## [15] SOCIALNE_7 UNIQUE
## [17] OBJECTID FIELD1
## [19] ADDRESS PRECISION
## [21] LAT LON
## [23] ADDRESS_1 What gender do you identify with?
## [25] age_guess
## <0 rows> (or 0-length row.names)
gis_age_data_guess_filtered <- gis_age_data_guess %>% filter(LAT > 31.7 & LAT < 32.3 & LON> -81.4 & LON < -80.9)
gis_age_data_guess_filtered
## SOCIALNETW CHECK SOCIALNE_1
## 1 1025 Correct 10071
## 2 1319 Correct 10095
## 3 1320 Correct 10095
## 4 1321 Correct 10095
## 5 1322 Correct 10095
## 6 459 Correct 10049
## 7 460 Correct 10049
## 8 87 Correct 10048
## 9 88 Correct 10048
## 10 89 Correct 10048
## 11 486 Different from Google - needs to be re-geocoded 10064
## 12 487 Correct 10064
## 13 488 Correct 10064
## 14 926 Correct 10072
## 15 927 Clio Updated 10072
## 16 928 Correct 10072
## 17 995 Correct 10071
## 18 996 Correct 10071
## 19 997 <NA> 10071
....
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_age_data_guess_filtered %>% filter(age_guess != "UNDEFINED"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE)) +
facet_grid(. ~ age_guess)
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_age_data_guess %>% filter(age_guess == "Y - O"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE))
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_age_data_guess %>% filter(age_guess == "O - Y"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE))
sav_map %>%
ggplot() +
geom_sf() +
geom_point(data=gis_age_data_guess %>% filter(age_guess == "S - S"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE))
all_data %>%
ggplot(aes(x=`What is your age range?`)) +
geom_bar(fill="darkblue") +
labs(title = "Participants by Age Range",
x = "Age Range", y = "Number of Participants")
all_data$`What race/ethnicity do you identify with?`
## [1] "Black / African American" "Other"
## [3] "White" "Black / African American"
## [5] "Black / African American" "Black / African American"
## [7] "White" "Other"
## [9] "Black / African American" "White"
## [11] "White" "Black / African American"
## [13] "Black / African American" "White"
## [15] "Black / African American" "Black / African American"
## [17] "Black / African American" "Black / African American"
## [19] "Black / African American" "Black / African American"
## [21] "White" "Black / African American"
## [23] "Black / African American" "Black / African American"
## [25] "White" "Black / African American"
## [27] "White" "Other"
## [29] "Black / African American" "Black / African American"
## [31] "Black / African American" "Black / African American"
## [33] "Black / African American" "White"
## [35] "Black / African American" "Black / African American"
## [37] "Black / African American" "Other"
## [39] "White" "Black / African American"
....
all_data %>%
ggplot(aes(x=`What race/ethnicity do you identify with?`)) +
geom_bar(fill="darkblue") +
labs(title = "Participants by Race",
x = "Gender", y = "Number of Participants")
all_data %>%
ggplot(aes(fill=`Disabled Community?`, x=`What is your age range?`)) +
geom_bar() +
labs(title = "Participants by Age Range",
x = "Age Range", y = "Number of Participants")
all_data %>%
ggplot(aes(fill=`LGBTQ+ Community?`, x=`What is your age range?`)) +
geom_bar() +
labs(title = "Participants by Age Range",
x = "Age Range", y = "Number of Participants")
# Exploratory data analysis: car ownership
all_data %>%
ggplot(aes(x=`What is your age range?`, fill=`Do you drive?`)) +
geom_bar() +
labs(title = "Participants by Age Range, Grouped by Driving",
x = "Age Range", y = "Number of Participants")
all_data %>%
ggplot(aes(x=`What is your age range?`, fill=`Do you own or have access to a car?`)) +
geom_bar() +
labs(title = "Participants by Age Range, Grouped by Car Ownership",
x = "Age Range", y = "Number of Participants")
all_data %>%
ggplot(aes(x=`What race/ethnicity do you identify with?`, fill=`Do you drive?`)) +
geom_bar() +
labs(title = "Participants by Age Range, Grouped by Driving",
x = "Age Range", y = "Number of Participants")
all_data %>%
ggplot(aes(x=`What race/ethnicity do you identify with?`, fill=`Do you own or have access to a car?`)) +
geom_bar() +
labs(title = "Participants by Age Range, Grouped by Car Ownership",
x = "Age Range", y = "Number of Participants")
race_and_driving_table <- table(all_data$`What race/ethnicity do you identify with?`, all_data$`Do you drive?`)
mosaicplot(race_and_driving_table, las = 1, shade=TRUE, main="Participants by Race and Driving")
race_and_car_table <- table(all_data$`What race/ethnicity do you identify with?`, all_data$`Do you own or have access to a car?`)
mosaicplot(race_and_car_table, las = 1, shade=TRUE, main="Participants by Race and Car Ownership")
age_and_driving_table <- table(all_data$`What is your age range?`, all_data$`Do you drive?`)
mosaicplot(age_and_driving_table, las = 1, shade=TRUE, main="Participants by Age and Driving")
age_and_car_table <- table(all_data$`What is your age range?`, all_data$`Do you own or have access to a car?`)
mosaicplot(age_and_car_table, las = 1, shade=TRUE, main="Participants by Age and Car Ownership")
disability_and_driving_table <- table(all_data$`Disabled Community?`, all_data$`Do you drive?`)
mosaicplot(disability_and_driving_table, las = 1, shade=TRUE, main="Participants by Disability and Driving")
disability_and_car_table <- table(all_data$`Disabled Community?`, all_data$`Do you own or have access to a car?`)
mosaicplot(disability_and_driving_table, las = 1, shade=TRUE, main="Participants by Disability and Car Ownership")
chisq.test(race_and_driving_table)
##
## Pearson's Chi-squared test
##
## data: race_and_driving_table
## X-squared = 1.5951, df = 2, p-value = 0.4504
chisq.test(race_and_car_table)
##
## Pearson's Chi-squared test
##
## data: race_and_car_table
## X-squared = 4.511, df = 2, p-value = 0.1048
chisq.test(age_and_driving_table)
##
## Pearson's Chi-squared test
##
## data: age_and_driving_table
## X-squared = 1.1706, df = 3, p-value = 0.7601
chisq.test(age_and_car_table)
##
## Pearson's Chi-squared test
##
## data: age_and_car_table
## X-squared = 1.8241, df = 3, p-value = 0.6097
chisq.test(disability_and_driving_table)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: disability_and_driving_table
## X-squared = 2.3478, df = 1, p-value = 0.1255
chisq.test(disability_and_car_table)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: disability_and_car_table
## X-squared = 3.7105, df = 1, p-value = 0.05407
age_and_disability_table <- table(all_data$`What is your age range?`, all_data$`Disabled Community?`)
mosaicplot(age_and_disability_table, las = 1, shade=TRUE, main="Participants by Disability Community and Age")
chisq.test(age_and_disability_table)
##
## Pearson's Chi-squared test
##
## data: age_and_disability_table
## X-squared = 9.4657, df = 3, p-value = 0.0237
age_and_disability_table <- table(all_data$`What is your age range?`, all_data$`LGBTQ+ Community?`)
mosaicplot(age_and_disability_table, las = 1, shade=TRUE, main="Participants by LGBTQ+ Community and Age")
chisq.test(age_and_disability_table)
##
## Pearson's Chi-squared test
##
## data: age_and_disability_table
## X-squared = 3.0683, df = 3, p-value = 0.3812
# Some issues, words with different meanings (for example, park like the place versus the verb)
data(stop_words)
comparison_cloud <- function (variable) {
responses <- all_data %>% select(`ID Number`, `Disabled Community?`, variable)
words <- responses %>% unnest_tokens(word, variable)
words <- words %>% filter(!(word %in% stop_words$word))
words_disabled_community <- words %>% filter(`Disabled Community?` == "Yes")
words_not_disabled_community <- words %>% filter(`Disabled Community?` == "No")
tdm_both <- TermDocumentMatrix(c(words_disabled_community$word, words_not_disabled_community$word), control = list(stemming = TRUE, stopwords = TRUE))
tdm_both <- as.matrix(tdm_both)
tdm_yes_sum <- rowSums(tdm_both[,1:nrow(words_disabled_community)])
tdm_no_sum <- rowSums(tdm_both[,nrow(words_disabled_community):(nrow(words_disabled_community)+nrow(words_not_disabled_community))])
tdm_both_sum <- cbind(tdm_yes_sum, tdm_no_sum)
colnames(tdm_both_sum) <- c("Disabled Community - Yes", "Disabled Community - No")
comparison.cloud(tdm_both_sum, random.order = FALSE,
colors = c("black", "red"), max.words = 100, scale=c(2.2, 0.44))
commonality.cloud(tdm_both_sum, random.order = FALSE,
colors = brewer.pal(8, "Dark2"), max.words = 100)
}
comparison_cloud("Places / Features / Activities")
comparison_cloud("First names & Relationship")
comparison_cloud("Are there other ways infrastructure changes in the city have impacted your social life?")
comparison_cloud("Has the city changed in a way that has made it easier for you to spend time with the people in your life?")
comparison_cloud("Please consider some changes to the city that have been made. Has the city changed in a way that has made it harder for you to spend time with the people in your life?")
comparison_cloud("Do you know anyone who has voiced an opinion or expressed some feelings about how they may or may not have places to go with others? Who was it and what have they said?")
all_data_counts <- all_data %>%
mutate(relationships_count = lengths(strsplit(`First names & Relationship`, split=",")), poi_count = lengths(strsplit(`Places / Features / Activities`, split=",")))
all_data_counts %>%
ggplot(aes(x=relationships_count, y=poi_count, color=`Disabled Community?`)) +
geom_point()
all_data_counts %>%
ggplot(aes(x=relationships_count, y=poi_count, color=`Do you own or have access to a car?`)) +
geom_point()
all_data_counts %>%
ggplot(aes(x=relationships_count, y=poi_count, color=`Do you drive?`)) +
geom_point()
all_data_counts %>%
ggplot(aes(x=relationships_count, y=poi_count, color=`What is your age range?`)) +
geom_point()
all_data_counts %>%
ggplot(aes(x=relationships_count, y=poi_count, color=`What race/ethnicity do you identify with?`)) +
geom_point()
count_by_demographic <- function (demographic)
{
all_data_counts %>%
group_by(across(all_of(demographic))) %>%
summarize(avg_relationships = mean(relationships_count), avg_poi = mean(poi_count))
}
by_disability_community <- count_by_demographic(c("Disabled Community?"))
by_disability_community %>%
ggplot(aes(x=`Disabled Community?`, y=avg_poi, fill=avg_relationships)) +
geom_bar(stat='identity') +
labs(title = "POI Count by Disability Community Membership")
by_car_ownership <- count_by_demographic(c("Do you own or have access to a car?"))
by_car_ownership %>%
ggplot(aes(x=`Do you own or have access to a car?`, y=avg_poi, fill=avg_relationships)) +
geom_bar(stat='identity') +
labs(title = "POI Count by Car Ownership")
by_driving <- count_by_demographic(c("Do you drive?"))
by_driving %>%
ggplot(aes(x=`Do you drive?`, y=avg_poi, fill=avg_relationships)) +
geom_bar(stat='identity') +
labs(title = "POI Count by Driving")
by_race <- count_by_demographic(c("What race/ethnicity do you identify with?"))
by_race %>%
ggplot(aes(x=`What race/ethnicity do you identify with?`, y=avg_poi, fill=avg_relationships)) +
geom_bar(stat='identity') +
labs(title = "POI Count by Race")
by_age <- count_by_demographic(c("What is your age range?"))
by_age %>%
ggplot(aes(x=`What is your age range?`, y=avg_poi, fill=avg_relationships)) +
geom_bar(stat='identity') +
labs(title = "POI Count by Age")